require(base)
require(data.table)
## Loading required package: data.table
require(datasets)
require(data.sets)
## Loading required package: data.sets
## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'data.sets'
require(dplyr)
## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
## 
##     between, first, last
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
require(forecast)
## Loading required package: forecast
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
require(GGally)
## Loading required package: GGally
## Loading required package: ggplot2
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
require(ggplot2)
require(ggstats)
## Loading required package: ggstats
require(graphics)
require(grDevices)
require(lubridate)
## Loading required package: lubridate
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:data.table':
## 
##     hour, isoweek, mday, minute, month, quarter, second, wday, week,
##     yday, year
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
require(methods)
require(readr)
## Loading required package: readr
require(stats)
require(utils)
require(RcppRoll)
## Loading required package: RcppRoll
require(tseries)
## Loading required package: tseries
library(readr)
library(ggplot2)
library(dplyr)
library(ggcorrplot)
library(ggridges)

processed_weather <- read_csv("processed_weather.csv", 
    col_types = cols(date = col_date(format = "%Y-%m-%d"), 
        hour = col_time(format = "%H")))
View(processed_weather)
production <- read_csv("production.csv", 
    col_types = cols(date = col_date(format = "%Y-%m-%d"), 
        hour = col_time(format = "%H")))
View(production)
processed_weather$location <- paste(processed_weather$lat, processed_weather$lon, sep = "_" )
setDT(processed_weather)
weather_wide = dcast(processed_weather, date + hour ~ location, value.var = c("dswrf_surface", "tcdc_low.cloud.layer","tcdc_middle.cloud.layer","tcdc_high.cloud.layer","tcdc_entire.atmosphere","uswrf_top_of_atmosphere","csnow_surface","dlwrf_surface","uswrf_surface","tmp_surface"))
combined_data <- merge(production, weather_wide, by = c("date","hour"), all.x = TRUE, all.y = FALSE)
setDT(combined_data)
combined_data[,datetime := as.POSIXct(paste(combined_data$date,combined_data$hour), format = "%Y-%m-%d %H:%M:%S")]
combined_data[,mon:=as.character(month(date,label=T))]
hour_id <- 0:23
combined_data <- cbind(combined_data,hour_id)
combined_data$is_sun <-0
condition_1 <- combined_data$hour_id > 5
condition_2 <- combined_data$hour_id < 20
combined_data$is_sun[condition_1 & condition_2] = 1
combined_data$avg_temp <- rowMeans(combined_data[,229:253])
combined_data$avg_DSWRF <- rowMeans(combined_data[,4:28])
combined_data$avg_lcloud <- rowMeans(combined_data[,29:53])
combined_data$avg_mcloud <- rowMeans(combined_data[,54:78])
combined_data$avg_hcloud <- rowMeans(combined_data[,79:103])
combined_data$avg_tcloud <- rowMeans(combined_data[,104:128])
combined_data$avg_USWRFtop <- rowMeans(combined_data[,129:153])
combined_data$avg_DLWRFsurf <- rowMeans(combined_data[,179:203])
combined_data$avg_USWRFsurf <- rowMeans(combined_data[,204:228])
combined_data$avg_CSNOW <- rowMeans(combined_data[,154:178])
daily_productions <- production %>%
  group_by(date) %>%
  summarize(daily_prod = sum(production))
setDT(daily_productions)

weekly_productions <- production %>%
  group_by(year = year(date), week = isoweek(date)) %>%
  summarize(weekly_prod = sum(production))
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
setDT(weekly_productions)
weekly_productions[,weeks:=1:.N]

monthly_productions <- production %>%
  group_by(year = year(date), month = month(date)) %>%
  summarize(monthly_prod = sum(production))
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
setDT(monthly_productions)
monthly_productions[,months:=1:.N]
ggplot(combined_data, aes(x = date, y = production)) + geom_line() + labs(title = "Hourly Energy Production Data", x = "Date", y = "Energy Production ")

ggplot(daily_productions, aes(x = date, y = daily_prod)) + geom_line() + labs(title = "Daily Energy Production Data", x = "Date", y = "Energy Production ")

ggplot(weekly_productions, aes(x = weeks, y = weekly_prod)) + geom_line() + labs(title = "Weekly Energy Production Data", x = "Date", y = "Energy Production ")

ggplot(monthly_productions, aes(x = months, y = monthly_prod)) + geom_line()+ labs(title = "Monthly Energy Production Data", x = "Date", y = "Energy Production ")

plot(acf(combined_data$production,100), main = "Hourly Energy Production Autocorrelation")

plot(acf(daily_productions$daily_prod,365), main = "Daily Energy Production Autocorrelation")

plot(acf(weekly_productions$weekly_prod,108), main = "Weekly Energy Production Autocorrelation")

plot(acf(monthly_productions$monthly_prod), main = "Monthly Energy Production Autocorrelation")

plot(pacf(combined_data$production,72), main = "Hourly Energy Production Partial Autocorrelation")

plot(pacf(daily_productions$daily_prod), main = "Daily Energy Production Partial Autocorrelation")

production_available <- combined_data %>%
  filter(is_sun == 1)
ggplot(production_available[date == "2023-05-15"], aes(x = datetime, y = production)) + geom_line()

ggplot(combined_data[date > "2024-02-01" & date < "2024-05-15"], aes(x = datetime, y = production)) + geom_line() + labs(title = "Hourly Energy Production Data 01.02.2024-15.05.2024", x = "Date", y = "Energy Production ")

corr_check <- data.frame(production_available$production, production_available$datetime,production_available$avg_DLWRFsurf, production_available$avg_temp, production_available$avg_hcloud, production_available$avg_lcloud, production_available$avg_mcloud, production_available$avg_tcloud, production_available$avg_USWRFsurf, production_available$avg_USWRFtop, production_available$avg_CSNOW, production_available$avg_DSWRF)
ggpairs(corr_check)
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 3 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 3 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 3 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 3 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 3 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 3 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 3 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 3 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 4 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 3 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 3 rows containing non-finite outside the scale range
## (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 3 rows containing missing values

## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 3 rows containing missing values
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_density()`).

ggplot(production_available, aes(x = avg_temp, y = production)) + geom_line() + geom_smooth(method = "loess") + labs(title = "Hourly Energy Production vs. Average Temp", x = "Average Temp", y = "Energy Production ")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).

ggplot(production_available, aes(x = avg_DSWRF, y = production)) + geom_line() + geom_smooth(method = "loess") + labs(title = "Hourly Energy Production vs. Average DSWRF Surface", x = "Average DSWRF", y = "Energy Production ")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 row containing non-finite outside the scale range (`stat_smooth()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).

ggplot(production_available, aes(x = avg_tcloud, y = production)) + geom_line() + geom_smooth(method = "loess") + labs(title = "Hourly Energy Production vs. Average Total Cloud", x = "Average Total Cloud", y = "Energy Production ")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_line()`).

ggplot(production_available, aes(x = avg_hcloud, y = production)) + geom_line() + geom_smooth(method = "loess") +  labs(title = "Hourly Energy Production vs. Average High Cloud", x = "Average High Cloud", y = "Energy Production ")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).

ggplot(production_available, aes(x = avg_mcloud, y = production)) + geom_line() + geom_smooth(method = "loess") + labs(title = "Hourly Energy Production vs. Average Middle Cloud", x = "Average Middle Cloud", y = "Energy Production ")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 row containing non-finite outside the scale range (`stat_smooth()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).

ggplot(production_available, aes(x = avg_lcloud, y = production)) + geom_line() + geom_smooth(method = "loess") + labs(title = "Hourly Energy Production vs. Average Low Cloud", x = "Average Low Cloud", y = "Energy Production ")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 row containing non-finite outside the scale range (`stat_smooth()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).

ggplot(production_available, aes(x = avg_USWRFsurf, y = production)) + geom_line() + geom_smooth(method = "loess") + labs(title = "Hourly Energy Production vs. Average USWRF Surface", x = "Average USWRF Surface", y = "Energy Production ")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_line()`).

ggplot(production_available, aes(x = avg_USWRFtop, y = production)) + geom_line() + geom_smooth(method = "loess") + labs(title = "Hourly Energy Production vs. Average USWRF Atmosphere", x = "Average USWRF Top", y = "Energy Production ")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 3 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_line()`).

ggplot(production_available, aes(x = avg_DLWRFsurf, y = production)) + geom_line() + geom_smooth(method = "loess") + labs(title = "Hourly Energy Production vs. Average DLWRF Surface", x = "Average DLWRF", y = "Energy Production ")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).

ggplot(production_available, aes(x = avg_CSNOW, y = production)) + geom_point() + geom_smooth(method = "loess") + labs(title = "Hourly Energy Production vs. Average Snow", x = "Average Snow", y = "Energy Production ")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : at -0.005
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : radius 2.5e-05
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : all data on boundary of neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : pseudoinverse used at -0.005
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : neighborhood radius 0.005
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : reciprocal condition number 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : zero-width neighborhood. make span bigger
## Warning: Failed to fit group -1.
## Caused by error in `predLoess()`:
## ! NA/NaN/Inf in foreign function call (arg 5)
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

ggplot(production_available, aes(x = datetime, y = production)) + geom_line() + geom_smooth(method = "loess") + geom_smooth(method = "lm") + labs(title = "Hourly Energy Production Smoothed", x = "Time", y = "Energy Production ")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

production_available[,Lag14:= shift(production,14)]
## Warning: Invalid .internal.selfref detected and fixed by taking a (shallow)
## copy of the data.table so that := can add this new column by reference. At an
## earlier point, this data.table has been copied by R (or was created manually
## using structure() or similar). Avoid names<- and attr<- which in R currently
## (and oddly) may copy the whole data.table. Use set* syntax instead to avoid
## copying: ?set, ?setnames and ?setattr. If this message doesn't help, please
## report your use case to the data.table issue tracker so the root cause can be
## fixed or this message improved.
production_available[,Lag1 := shift(production,1)]
production_available[,diff14 := production-Lag14]
Test_date_start = "2024-02-01"
Test_date_end = "2024-05-15"

model_data_tr <- production_available[date < Test_date_start]
model_data_test <- production_available[date >= Test_date_start & date <= Test_date_end]
tsr1 <- lm(production ~ mon + as.factor(hour_id) + Lag14 + Lag1, model_data_tr)
summary(tsr1)
## 
## Call:
## lm(formula = production ~ mon + as.factor(hour_id) + Lag14 + 
##     Lag1, data = model_data_tr)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.8897 -0.6897  0.0730  0.7902  8.6023 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           0.488431   0.071663   6.816 9.89e-12 ***
## monAug                0.237178   0.071359   3.324 0.000891 ***
## monDec               -0.223461   0.071485  -3.126 0.001777 ** 
## monFeb               -0.077109   0.072529  -1.063 0.287740    
## monJan               -0.300394   0.066204  -4.537 5.76e-06 ***
## monJul                0.274782   0.071707   3.832 0.000128 ***
## monJun                0.112372   0.071191   1.578 0.114489    
## monMar               -0.089938   0.070668  -1.273 0.203158    
## monMay                0.071019   0.070524   1.007 0.313946    
## monNov               -0.177527   0.071579  -2.480 0.013148 *  
## monOct                0.014136   0.070471   0.201 0.841023    
## monSep                0.187090   0.071629   2.612 0.009016 ** 
## as.factor(hour_id)7   1.435912   0.076003  18.893  < 2e-16 ***
## as.factor(hour_id)8   2.187998   0.081315  26.908  < 2e-16 ***
## as.factor(hour_id)9   1.995202   0.089157  22.379  < 2e-16 ***
## as.factor(hour_id)10  1.196201   0.094637  12.640  < 2e-16 ***
## as.factor(hour_id)11  0.879963   0.096433   9.125  < 2e-16 ***
## as.factor(hour_id)12  0.720929   0.096501   7.471 8.60e-14 ***
## as.factor(hour_id)13  0.396919   0.095199   4.169 3.08e-05 ***
## as.factor(hour_id)14 -0.210469   0.091806  -2.293 0.021893 *  
## as.factor(hour_id)15 -0.962148   0.086275 -11.152  < 2e-16 ***
## as.factor(hour_id)16 -1.579514   0.080379 -19.651  < 2e-16 ***
## as.factor(hour_id)17 -1.361516   0.076353 -17.832  < 2e-16 ***
## as.factor(hour_id)18 -1.020975   0.075107 -13.594  < 2e-16 ***
## as.factor(hour_id)19 -0.530419   0.074803  -7.091 1.42e-12 ***
## Lag14                 0.163682   0.006824  23.988  < 2e-16 ***
## Lag1                  0.677217   0.006832  99.123  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.456 on 10613 degrees of freedom
##   (14 observations deleted due to missingness)
## Multiple R-squared:  0.8649, Adjusted R-squared:  0.8646 
## F-statistic:  2613 on 26 and 10613 DF,  p-value: < 2.2e-16
checkresiduals(tsr1)

## 
##  Breusch-Godfrey test for serial correlation of order up to 30
## 
## data:  Residuals
## LM test = 774.59, df = 30, p-value < 2.2e-16
prediction_data_tsr = copy(model_data_test)
prediction_data_tsr[, actual:= model_data_test$production]
prediction_data_tsr[, prediction_model1 := predict(tsr1,model_data_test)]
prediction_data_tsr$prediction_model1[prediction_data_tsr$prediction_model1 < 0] = 0
prediction_data_tsr[, residual_model1 := actual - prediction_model1]
ggplot(prediction_data_tsr, aes(x = datetime)) + geom_line(aes(y = actual, color = "actual")) + geom_line(aes(y = prediction_model1, color = "model 1"))

tsr2 <- lm(production ~ -1 +mon + as.factor(hour_id) + Lag14 + Lag1 + avg_temp + avg_DSWRF + avg_lcloud + avg_mcloud + avg_hcloud + avg_tcloud + avg_USWRFtop + avg_DLWRFsurf + avg_USWRFsurf + avg_CSNOW, model_data_tr)
summary(tsr2)
## 
## Call:
## lm(formula = production ~ -1 + mon + as.factor(hour_id) + Lag14 + 
##     Lag1 + avg_temp + avg_DSWRF + avg_lcloud + avg_mcloud + avg_hcloud + 
##     avg_tcloud + avg_USWRFtop + avg_DLWRFsurf + avg_USWRFsurf + 
##     avg_CSNOW, data = model_data_tr)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.0197  -0.7158   0.0781   0.7668   8.0608 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## monApr               -6.2325540  1.5064692  -4.137 3.54e-05 ***
## monAug               -6.0827269  1.5438259  -3.940 8.20e-05 ***
## monDec               -6.9003819  1.4722342  -4.687 2.81e-06 ***
## monFeb               -6.5021370  1.4640603  -4.441 9.04e-06 ***
## monJan               -6.9197600  1.4682702  -4.713 2.47e-06 ***
## monJul               -5.9998300  1.5300240  -3.921 8.86e-05 ***
## monJun               -5.9474117  1.5146957  -3.926 8.67e-05 ***
## monMar               -6.3202323  1.4862227  -4.253 2.13e-05 ***
## monMay               -6.0845687  1.5117701  -4.025 5.74e-05 ***
## monNov               -6.7954957  1.4878781  -4.567 5.00e-06 ***
## monOct               -6.5232630  1.5070606  -4.328 1.52e-05 ***
## monSep               -6.3087997  1.5296954  -4.124 3.75e-05 ***
## as.factor(hour_id)7   1.4339621  0.0748292  19.163  < 2e-16 ***
## as.factor(hour_id)8   2.2528233  0.0833386  27.032  < 2e-16 ***
## as.factor(hour_id)9   2.1429245  0.0992002  21.602  < 2e-16 ***
## as.factor(hour_id)10  1.9946090  0.1505606  13.248  < 2e-16 ***
## as.factor(hour_id)11  1.7473705  0.1655432  10.555  < 2e-16 ***
## as.factor(hour_id)12  1.6441303  0.1761312   9.335  < 2e-16 ***
## as.factor(hour_id)13  1.3667894  0.1816623   7.524 5.75e-14 ***
## as.factor(hour_id)14  0.7955475  0.1818304   4.375 1.22e-05 ***
## as.factor(hour_id)15  0.0448128  0.1771898   0.253 0.800344    
## as.factor(hour_id)16 -0.7263122  0.1547507  -4.693 2.72e-06 ***
## as.factor(hour_id)17 -0.6585855  0.1364411  -4.827 1.41e-06 ***
## as.factor(hour_id)18 -0.4325847  0.1181362  -3.662 0.000252 ***
## as.factor(hour_id)19 -0.0285958  0.1037890  -0.276 0.782923    
## Lag14                 0.1591265  0.0068441  23.250  < 2e-16 ***
## Lag1                  0.6224335  0.0078475  79.316  < 2e-16 ***
## avg_temp              0.0348264  0.0061725   5.642 1.72e-08 ***
## avg_DSWRF            -0.0018410  0.0002163  -8.513  < 2e-16 ***
## avg_lcloud            0.0014841  0.0011342   1.308 0.190736    
## avg_mcloud           -0.0016616  0.0008028  -2.070 0.038511 *  
## avg_hcloud            0.0003151  0.0008407   0.375 0.707853    
## avg_tcloud           -0.0040692  0.0010503  -3.874 0.000108 ***
## avg_USWRFtop         -0.0002658  0.0003787  -0.702 0.482779    
## avg_DLWRFsurf        -0.0094946  0.0011758  -8.075 7.47e-16 ***
## avg_USWRFsurf         0.0010828  0.0005541   1.954 0.050699 .  
## avg_CSNOW            -0.2428656  0.1122221  -2.164 0.030475 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.423 on 10599 degrees of freedom
##   (18 observations deleted due to missingness)
## Multiple R-squared:  0.9427, Adjusted R-squared:  0.9425 
## F-statistic:  4713 on 37 and 10599 DF,  p-value: < 2.2e-16
checkresiduals(tsr2)

## 
##  Breusch-Godfrey test for serial correlation of order up to 40
## 
## data:  Residuals
## LM test = 837.64, df = 40, p-value < 2.2e-16
prediction_data_tsr[, prediction_model2 := predict(tsr2,model_data_test)]
prediction_data_tsr$prediction_model2[prediction_data_tsr$prediction_model2 < 0] = 0
prediction_data_tsr[, residual_model2 := actual - prediction_model2]
ggplot(prediction_data_tsr, aes(x = datetime)) + geom_line(aes(y = actual, color = "actual")) + geom_line(aes(y = prediction_model2, color = "model 2"))

tsr3 <- lm(production ~ -1 +mon + as.factor(hour_id) + Lag14 + Lag1 + avg_temp + avg_DSWRF + avg_mcloud + avg_tcloud + avg_DLWRFsurf + avg_USWRFsurf + avg_CSNOW, model_data_tr)
summary(tsr3)
## 
## Call:
## lm(formula = production ~ -1 + mon + as.factor(hour_id) + Lag14 + 
##     Lag1 + avg_temp + avg_DSWRF + avg_mcloud + avg_tcloud + avg_DLWRFsurf + 
##     avg_USWRFsurf + avg_CSNOW, data = model_data_tr)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.0155  -0.7242   0.0792   0.7732   8.0814 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## monApr               -5.8846140  1.3035009  -4.514 6.42e-06 ***
## monAug               -5.7429828  1.3455591  -4.268 1.99e-05 ***
## monDec               -6.5311525  1.2882731  -5.070 4.05e-07 ***
## monFeb               -6.1402056  1.2729887  -4.823 1.43e-06 ***
## monJan               -6.5499829  1.2812006  -5.112 3.24e-07 ***
## monJul               -5.6573904  1.3270224  -4.263 2.03e-05 ***
## monJun               -5.6116927  1.3103816  -4.282 1.86e-05 ***
## monMar               -5.9629716  1.2871068  -4.633 3.65e-06 ***
## monMay               -5.7412767  1.3046463  -4.401 1.09e-05 ***
## monNov               -6.4302950  1.3005722  -4.944 7.76e-07 ***
## monOct               -6.1679612  1.3153529  -4.689 2.78e-06 ***
## monSep               -5.9588546  1.3324405  -4.472 7.82e-06 ***
## as.factor(hour_id)7   1.4363436  0.0744875  19.283  < 2e-16 ***
## as.factor(hour_id)8   2.2580297  0.0808278  27.936  < 2e-16 ***
## as.factor(hour_id)9   2.1497321  0.0915056  23.493  < 2e-16 ***
## as.factor(hour_id)10  1.9578034  0.1081488  18.103  < 2e-16 ***
## as.factor(hour_id)11  1.7081429  0.1148161  14.877  < 2e-16 ***
## as.factor(hour_id)12  1.5980308  0.1195390  13.368  < 2e-16 ***
## as.factor(hour_id)13  1.3216388  0.1217332  10.857  < 2e-16 ***
## as.factor(hour_id)14  0.7461996  0.1207838   6.178 6.73e-10 ***
## as.factor(hour_id)15 -0.0094597  0.1170861  -0.081   0.9356    
## as.factor(hour_id)16 -0.7783211  0.1030807  -7.551 4.69e-14 ***
## as.factor(hour_id)17 -0.7084445  0.0947727  -7.475 8.32e-14 ***
## as.factor(hour_id)18 -0.4777300  0.0885034  -5.398 6.89e-08 ***
## as.factor(hour_id)19 -0.0678920  0.0840929  -0.807   0.4195    
## Lag14                 0.1581962  0.0068244  23.181  < 2e-16 ***
## Lag1                  0.6226021  0.0078272  79.544  < 2e-16 ***
## avg_temp              0.0332317  0.0052299   6.354 2.18e-10 ***
## avg_DSWRF            -0.0017787  0.0002011  -8.846  < 2e-16 ***
## avg_mcloud           -0.0018571  0.0007863  -2.362   0.0182 *  
## avg_tcloud           -0.0036635  0.0007093  -5.165 2.45e-07 ***
## avg_DLWRFsurf        -0.0090989  0.0009695  -9.386  < 2e-16 ***
## avg_USWRFsurf         0.0008952  0.0004951   1.808   0.0706 .  
## avg_CSNOW            -0.1613856  0.0972504  -1.659   0.0970 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.423 on 10603 degrees of freedom
##   (17 observations deleted due to missingness)
## Multiple R-squared:  0.9427, Adjusted R-squared:  0.9425 
## F-statistic:  5127 on 34 and 10603 DF,  p-value: < 2.2e-16
checkresiduals(tsr3)

## 
##  Breusch-Godfrey test for serial correlation of order up to 37
## 
## data:  Residuals
## LM test = 824.68, df = 37, p-value < 2.2e-16
prediction_data_tsr[, prediction_model3 := predict(tsr3,model_data_test)]
prediction_data_tsr$prediction_model3[prediction_data_tsr$prediction_model3 < 0] = 0
prediction_data_tsr[, residual_model3 := actual - prediction_model3]
ggplot(prediction_data_tsr, aes(x = datetime)) + geom_line(aes(y = actual, color = "actual")) + geom_line(aes(y = prediction_model3, color = "model 3"))

tsr4 <- lm(production ~ -1 +mon:as.factor(hour_id) + Lag14 + Lag1 + avg_temp + avg_DSWRF + avg_lcloud + avg_tcloud +  avg_DLWRFsurf + avg_USWRFsurf + avg_CSNOW, model_data_tr)
summary(tsr4)
## 
## Call:
## lm(formula = production ~ -1 + mon:as.factor(hour_id) + Lag14 + 
##     Lag1 + avg_temp + avg_DSWRF + avg_lcloud + avg_tcloud + avg_DLWRFsurf + 
##     avg_USWRFsurf + avg_CSNOW, data = model_data_tr)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.7083 -0.5642  0.0647  0.6801  8.5590 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)    
## Lag14                        9.524e-02  6.821e-03  13.963  < 2e-16 ***
## Lag1                         6.371e-01  7.797e-03  81.712  < 2e-16 ***
## avg_temp                     9.127e-02  7.887e-03  11.573  < 2e-16 ***
## avg_DSWRF                   -4.300e-03  3.990e-04 -10.777  < 2e-16 ***
## avg_lcloud                   1.926e-03  9.408e-04   2.047   0.0407 *  
## avg_tcloud                  -3.565e-03  6.315e-04  -5.646 1.68e-08 ***
## avg_DLWRFsurf               -1.521e-02  1.276e-03 -11.919  < 2e-16 ***
## avg_USWRFsurf                5.044e-03  6.704e-04   7.523 5.78e-14 ***
## avg_CSNOW                   -1.887e-01  1.043e-01  -1.810   0.0704 .  
## monApr:as.factor(hour_id)6  -2.060e+01  1.937e+00 -10.634  < 2e-16 ***
## monAug:as.factor(hour_id)6  -2.049e+01  1.952e+00 -10.498  < 2e-16 ***
## monDec:as.factor(hour_id)6  -2.082e+01  1.917e+00 -10.860  < 2e-16 ***
## monFeb:as.factor(hour_id)6  -2.052e+01  1.894e+00 -10.831  < 2e-16 ***
## monJan:as.factor(hour_id)6  -2.073e+01  1.906e+00 -10.878  < 2e-16 ***
## monJul:as.factor(hour_id)6  -1.984e+01  1.961e+00 -10.117  < 2e-16 ***
## monJun:as.factor(hour_id)6  -1.985e+01  1.959e+00 -10.133  < 2e-16 ***
## monMar:as.factor(hour_id)6  -2.057e+01  1.915e+00 -10.743  < 2e-16 ***
## monMay:as.factor(hour_id)6  -2.060e+01  1.948e+00 -10.571  < 2e-16 ***
## monNov:as.factor(hour_id)6  -2.090e+01  1.929e+00 -10.836  < 2e-16 ***
## monOct:as.factor(hour_id)6  -2.077e+01  1.941e+00 -10.699  < 2e-16 ***
## monSep:as.factor(hour_id)6  -2.051e+01  1.948e+00 -10.530  < 2e-16 ***
## monApr:as.factor(hour_id)7  -1.827e+01  1.948e+00  -9.379  < 2e-16 ***
## monAug:as.factor(hour_id)7  -1.925e+01  1.984e+00  -9.703  < 2e-16 ***
## monDec:as.factor(hour_id)7  -2.042e+01  1.916e+00 -10.660  < 2e-16 ***
## monFeb:as.factor(hour_id)7  -1.970e+01  1.893e+00 -10.406  < 2e-16 ***
## monJan:as.factor(hour_id)7  -2.053e+01  1.905e+00 -10.777  < 2e-16 ***
## monJul:as.factor(hour_id)7  -1.843e+01  1.987e+00  -9.273  < 2e-16 ***
## monJun:as.factor(hour_id)7  -1.828e+01  1.978e+00  -9.243  < 2e-16 ***
## monMar:as.factor(hour_id)7  -1.884e+01  1.915e+00  -9.839  < 2e-16 ***
## monMay:as.factor(hour_id)7  -1.825e+01  1.970e+00  -9.267  < 2e-16 ***
## monNov:as.factor(hour_id)7  -1.940e+01  1.926e+00 -10.073  < 2e-16 ***
## monOct:as.factor(hour_id)7  -1.809e+01  1.938e+00  -9.332  < 2e-16 ***
## monSep:as.factor(hour_id)7  -1.833e+01  1.959e+00  -9.356  < 2e-16 ***
## monApr:as.factor(hour_id)8  -1.905e+01  1.968e+00  -9.680  < 2e-16 ***
## monAug:as.factor(hour_id)8  -1.820e+01  2.026e+00  -8.984  < 2e-16 ***
## monDec:as.factor(hour_id)8  -1.804e+01  1.914e+00  -9.429  < 2e-16 ***
## monFeb:as.factor(hour_id)8  -1.776e+01  1.896e+00  -9.365  < 2e-16 ***
## monJan:as.factor(hour_id)8  -1.855e+01  1.903e+00  -9.743  < 2e-16 ***
## monJul:as.factor(hour_id)8  -1.837e+01  2.011e+00  -9.136  < 2e-16 ***
## monJun:as.factor(hour_id)8  -1.867e+01  1.992e+00  -9.372  < 2e-16 ***
## monMar:as.factor(hour_id)8  -1.768e+01  1.927e+00  -9.177  < 2e-16 ***
## monMay:as.factor(hour_id)8  -1.838e+01  1.985e+00  -9.257  < 2e-16 ***
## monNov:as.factor(hour_id)8  -1.797e+01  1.932e+00  -9.301  < 2e-16 ***
## monOct:as.factor(hour_id)8  -1.774e+01  1.959e+00  -9.056  < 2e-16 ***
## monSep:as.factor(hour_id)8  -1.821e+01  1.998e+00  -9.111  < 2e-16 ***
## monApr:as.factor(hour_id)9  -1.845e+01  1.985e+00  -9.292  < 2e-16 ***
## monAug:as.factor(hour_id)9  -1.927e+01  2.057e+00  -9.369  < 2e-16 ***
## monDec:as.factor(hour_id)9  -1.780e+01  1.929e+00  -9.226  < 2e-16 ***
## monFeb:as.factor(hour_id)9  -1.723e+01  1.915e+00  -8.995  < 2e-16 ***
## monJan:as.factor(hour_id)9  -1.825e+01  1.917e+00  -9.521  < 2e-16 ***
## monJul:as.factor(hour_id)9  -1.876e+01  2.029e+00  -9.247  < 2e-16 ***
## monJun:as.factor(hour_id)9  -1.870e+01  2.003e+00  -9.335  < 2e-16 ***
## monMar:as.factor(hour_id)9  -1.820e+01  1.943e+00  -9.364  < 2e-16 ***
## monMay:as.factor(hour_id)9  -1.895e+01  1.998e+00  -9.487  < 2e-16 ***
## monNov:as.factor(hour_id)9  -1.839e+01  1.956e+00  -9.401  < 2e-16 ***
## monOct:as.factor(hour_id)9  -1.861e+01  1.989e+00  -9.357  < 2e-16 ***
## monSep:as.factor(hour_id)9  -1.863e+01  2.031e+00  -9.169  < 2e-16 ***
## monApr:as.factor(hour_id)10 -1.793e+01  1.911e+00  -9.381  < 2e-16 ***
## monAug:as.factor(hour_id)10 -1.795e+01  1.970e+00  -9.114  < 2e-16 ***
## monDec:as.factor(hour_id)10 -1.833e+01  1.919e+00  -9.548  < 2e-16 ***
## monFeb:as.factor(hour_id)10 -1.809e+01  1.907e+00  -9.483  < 2e-16 ***
## monJan:as.factor(hour_id)10 -1.872e+01  1.911e+00  -9.794  < 2e-16 ***
## monJul:as.factor(hour_id)10 -1.790e+01  1.931e+00  -9.273  < 2e-16 ***
## monJun:as.factor(hour_id)10 -1.785e+01  1.902e+00  -9.386  < 2e-16 ***
## monMar:as.factor(hour_id)10 -1.827e+01  1.909e+00  -9.568  < 2e-16 ***
## monMay:as.factor(hour_id)10 -1.798e+01  1.903e+00  -9.449  < 2e-16 ***
## monNov:as.factor(hour_id)10 -1.867e+01  1.929e+00  -9.679  < 2e-16 ***
## monOct:as.factor(hour_id)10 -1.840e+01  1.941e+00  -9.480  < 2e-16 ***
## monSep:as.factor(hour_id)10 -1.832e+01  1.962e+00  -9.341  < 2e-16 ***
## monApr:as.factor(hour_id)11 -1.829e+01  1.914e+00  -9.558  < 2e-16 ***
## monAug:as.factor(hour_id)11 -1.841e+01  1.978e+00  -9.307  < 2e-16 ***
## monDec:as.factor(hour_id)11 -1.883e+01  1.927e+00  -9.774  < 2e-16 ***
## monFeb:as.factor(hour_id)11 -1.822e+01  1.914e+00  -9.518  < 2e-16 ***
## monJan:as.factor(hour_id)11 -1.913e+01  1.919e+00  -9.971  < 2e-16 ***
## monJul:as.factor(hour_id)11 -1.801e+01  1.936e+00  -9.305  < 2e-16 ***
## monJun:as.factor(hour_id)11 -1.803e+01  1.903e+00  -9.478  < 2e-16 ***
## monMar:as.factor(hour_id)11 -1.846e+01  1.915e+00  -9.641  < 2e-16 ***
## monMay:as.factor(hour_id)11 -1.840e+01  1.906e+00  -9.655  < 2e-16 ***
## monNov:as.factor(hour_id)11 -1.892e+01  1.938e+00  -9.765  < 2e-16 ***
## monOct:as.factor(hour_id)11 -1.865e+01  1.951e+00  -9.557  < 2e-16 ***
## monSep:as.factor(hour_id)11 -1.854e+01  1.972e+00  -9.397  < 2e-16 ***
## monApr:as.factor(hour_id)12 -1.831e+01  1.914e+00  -9.566  < 2e-16 ***
## monAug:as.factor(hour_id)12 -1.816e+01  1.981e+00  -9.168  < 2e-16 ***
## monDec:as.factor(hour_id)12 -1.920e+01  1.931e+00  -9.944  < 2e-16 ***
## monFeb:as.factor(hour_id)12 -1.893e+01  1.917e+00  -9.873  < 2e-16 ***
## monJan:as.factor(hour_id)12 -1.942e+01  1.922e+00 -10.101  < 2e-16 ***
## monJul:as.factor(hour_id)12 -1.798e+01  1.937e+00  -9.281  < 2e-16 ***
## monJun:as.factor(hour_id)12 -1.810e+01  1.900e+00  -9.526  < 2e-16 ***
## monMar:as.factor(hour_id)12 -1.859e+01  1.917e+00  -9.698  < 2e-16 ***
## monMay:as.factor(hour_id)12 -1.780e+01  1.904e+00  -9.351  < 2e-16 ***
## monNov:as.factor(hour_id)12 -1.932e+01  1.942e+00  -9.945  < 2e-16 ***
## monOct:as.factor(hour_id)12 -1.896e+01  1.955e+00  -9.697  < 2e-16 ***
## monSep:as.factor(hour_id)12 -1.863e+01  1.976e+00  -9.427  < 2e-16 ***
## monApr:as.factor(hour_id)13 -1.881e+01  1.911e+00  -9.844  < 2e-16 ***
## monAug:as.factor(hour_id)13 -1.864e+01  1.975e+00  -9.435  < 2e-16 ***
## monDec:as.factor(hour_id)13 -1.993e+01  1.930e+00 -10.329  < 2e-16 ***
## monFeb:as.factor(hour_id)13 -1.878e+01  1.917e+00  -9.798  < 2e-16 ***
## monJan:as.factor(hour_id)13 -1.956e+01  1.921e+00 -10.182  < 2e-16 ***
## monJul:as.factor(hour_id)13 -1.798e+01  1.933e+00  -9.302  < 2e-16 ***
## monJun:as.factor(hour_id)13 -1.794e+01  1.895e+00  -9.471  < 2e-16 ***
## monMar:as.factor(hour_id)13 -1.899e+01  1.916e+00  -9.910  < 2e-16 ***
## monMay:as.factor(hour_id)13 -1.813e+01  1.896e+00  -9.562  < 2e-16 ***
## monNov:as.factor(hour_id)13 -2.001e+01  1.942e+00 -10.303  < 2e-16 ***
## monOct:as.factor(hour_id)13 -1.934e+01  1.953e+00  -9.905  < 2e-16 ***
## monSep:as.factor(hour_id)13 -1.858e+01  1.972e+00  -9.423  < 2e-16 ***
## monApr:as.factor(hour_id)14 -1.869e+01  1.905e+00  -9.811  < 2e-16 ***
## monAug:as.factor(hour_id)14 -1.894e+01  1.963e+00  -9.646  < 2e-16 ***
## monDec:as.factor(hour_id)14 -2.115e+01  1.926e+00 -10.982  < 2e-16 ***
## monFeb:as.factor(hour_id)14 -1.956e+01  1.914e+00 -10.220  < 2e-16 ***
## monJan:as.factor(hour_id)14 -2.026e+01  1.917e+00 -10.567  < 2e-16 ***
## monJul:as.factor(hour_id)14 -1.836e+01  1.926e+00  -9.532  < 2e-16 ***
## monJun:as.factor(hour_id)14 -1.855e+01  1.886e+00  -9.836  < 2e-16 ***
## monMar:as.factor(hour_id)14 -1.901e+01  1.912e+00  -9.943  < 2e-16 ***
## monMay:as.factor(hour_id)14 -1.861e+01  1.888e+00  -9.860  < 2e-16 ***
## monNov:as.factor(hour_id)14 -2.045e+01  1.936e+00 -10.565  < 2e-16 ***
## monOct:as.factor(hour_id)14 -2.042e+01  1.944e+00 -10.504  < 2e-16 ***
## monSep:as.factor(hour_id)14 -1.957e+01  1.961e+00  -9.978  < 2e-16 ***
## monApr:as.factor(hour_id)15 -1.923e+01  1.893e+00 -10.159  < 2e-16 ***
## monAug:as.factor(hour_id)15 -1.953e+01  1.946e+00 -10.036  < 2e-16 ***
## monDec:as.factor(hour_id)15 -2.182e+01  1.919e+00 -11.375  < 2e-16 ***
## monFeb:as.factor(hour_id)15 -2.090e+01  1.911e+00 -10.938  < 2e-16 ***
## monJan:as.factor(hour_id)15 -2.114e+01  1.910e+00 -11.071  < 2e-16 ***
## monJul:as.factor(hour_id)15 -1.930e+01  1.915e+00 -10.077  < 2e-16 ***
## monJun:as.factor(hour_id)15 -1.867e+01  1.875e+00  -9.955  < 2e-16 ***
## monMar:as.factor(hour_id)15 -1.994e+01  1.905e+00 -10.464  < 2e-16 ***
## monMay:as.factor(hour_id)15 -1.852e+01  1.878e+00  -9.861  < 2e-16 ***
## monNov:as.factor(hour_id)15 -2.174e+01  1.926e+00 -11.288  < 2e-16 ***
## monOct:as.factor(hour_id)15 -2.137e+01  1.932e+00 -11.058  < 2e-16 ***
## monSep:as.factor(hour_id)15 -2.054e+01  1.947e+00 -10.550  < 2e-16 ***
## monApr:as.factor(hour_id)16 -2.034e+01  1.904e+00 -10.685  < 2e-16 ***
## monAug:as.factor(hour_id)16 -2.077e+01  1.951e+00 -10.645  < 2e-16 ***
## monDec:as.factor(hour_id)16 -2.168e+01  1.926e+00 -11.254  < 2e-16 ***
## monFeb:as.factor(hour_id)16 -2.167e+01  1.907e+00 -11.364  < 2e-16 ***
## monJan:as.factor(hour_id)16 -2.169e+01  1.910e+00 -11.352  < 2e-16 ***
## monJul:as.factor(hour_id)16 -2.064e+01  1.919e+00 -10.759  < 2e-16 ***
## monJun:as.factor(hour_id)16 -2.018e+01  1.903e+00 -10.603  < 2e-16 ***
## monMar:as.factor(hour_id)16 -2.124e+01  1.907e+00 -11.133  < 2e-16 ***
## monMay:as.factor(hour_id)16 -2.022e+01  1.901e+00 -10.641  < 2e-16 ***
## monNov:as.factor(hour_id)16 -2.189e+01  1.941e+00 -11.274  < 2e-16 ***
## monOct:as.factor(hour_id)16 -2.219e+01  1.951e+00 -11.373  < 2e-16 ***
## monSep:as.factor(hour_id)16 -2.201e+01  1.956e+00 -11.254  < 2e-16 ***
## monApr:as.factor(hour_id)17 -2.081e+01  1.904e+00 -10.931  < 2e-16 ***
## monAug:as.factor(hour_id)17 -2.168e+01  1.944e+00 -11.149  < 2e-16 ***
## monDec:as.factor(hour_id)17 -2.075e+01  1.913e+00 -10.847  < 2e-16 ***
## monFeb:as.factor(hour_id)17 -2.136e+01  1.901e+00 -11.235  < 2e-16 ***
## monJan:as.factor(hour_id)17 -2.076e+01  1.901e+00 -10.916  < 2e-16 ***
## monJul:as.factor(hour_id)17 -2.170e+01  1.918e+00 -11.315  < 2e-16 ***
## monJun:as.factor(hour_id)17 -2.073e+01  1.905e+00 -10.883  < 2e-16 ***
## monMar:as.factor(hour_id)17 -2.114e+01  1.907e+00 -11.086  < 2e-16 ***
## monMay:as.factor(hour_id)17 -2.071e+01  1.901e+00 -10.891  < 2e-16 ***
## monNov:as.factor(hour_id)17 -2.097e+01  1.931e+00 -10.858  < 2e-16 ***
## monOct:as.factor(hour_id)17 -2.144e+01  1.944e+00 -11.029  < 2e-16 ***
## monSep:as.factor(hour_id)17 -2.214e+01  1.948e+00 -11.364  < 2e-16 ***
## monApr:as.factor(hour_id)18 -2.124e+01  1.902e+00 -11.168  < 2e-16 ***
## monAug:as.factor(hour_id)18 -2.183e+01  1.934e+00 -11.284  < 2e-16 ***
## monDec:as.factor(hour_id)18 -2.072e+01  1.911e+00 -10.844  < 2e-16 ***
## monFeb:as.factor(hour_id)18 -2.028e+01  1.884e+00 -10.764  < 2e-16 ***
## monJan:as.factor(hour_id)18 -2.042e+01  1.892e+00 -10.793  < 2e-16 ***
## monJul:as.factor(hour_id)18 -2.124e+01  1.914e+00 -11.094  < 2e-16 ***
## monJun:as.factor(hour_id)18 -2.090e+01  1.906e+00 -10.970  < 2e-16 ***
## monMar:as.factor(hour_id)18 -2.032e+01  1.900e+00 -10.694  < 2e-16 ***
## monMay:as.factor(hour_id)18 -2.130e+01  1.902e+00 -11.203  < 2e-16 ***
## monNov:as.factor(hour_id)18 -2.077e+01  1.927e+00 -10.780  < 2e-16 ***
## monOct:as.factor(hour_id)18 -2.075e+01  1.931e+00 -10.747  < 2e-16 ***
## monSep:as.factor(hour_id)18 -2.121e+01  1.935e+00 -10.964  < 2e-16 ***
## monApr:as.factor(hour_id)19 -2.016e+01  1.897e+00 -10.623  < 2e-16 ***
## monAug:as.factor(hour_id)19 -2.052e+01  1.915e+00 -10.712  < 2e-16 ***
## monDec:as.factor(hour_id)19 -2.078e+01  1.914e+00 -10.856  < 2e-16 ***
## monFeb:as.factor(hour_id)19 -2.026e+01  1.882e+00 -10.766  < 2e-16 ***
## monJan:as.factor(hour_id)19 -2.050e+01  1.896e+00 -10.810  < 2e-16 ***
## monJul:as.factor(hour_id)19 -2.052e+01  1.906e+00 -10.766  < 2e-16 ***
## monJun:as.factor(hour_id)19 -2.020e+01  1.905e+00 -10.603  < 2e-16 ***
## monMar:as.factor(hour_id)19 -2.009e+01  1.892e+00 -10.618  < 2e-16 ***
## monMay:as.factor(hour_id)19 -2.012e+01  1.899e+00 -10.596  < 2e-16 ***
## monNov:as.factor(hour_id)19 -2.082e+01  1.929e+00 -10.792  < 2e-16 ***
## monOct:as.factor(hour_id)19 -2.082e+01  1.934e+00 -10.764  < 2e-16 ***
## monSep:as.factor(hour_id)19 -2.058e+01  1.918e+00 -10.733  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.355 on 10460 degrees of freedom
##   (17 observations deleted due to missingness)
## Multiple R-squared:  0.9488, Adjusted R-squared:  0.9479 
## F-statistic:  1094 on 177 and 10460 DF,  p-value: < 2.2e-16
checkresiduals(tsr4)

## 
##  Breusch-Godfrey test for serial correlation of order up to 180
## 
## data:  Residuals
## LM test = 552.2, df = 180, p-value < 2.2e-16
prediction_data_tsr[, prediction_model4 := predict(tsr4,model_data_test)]
prediction_data_tsr$prediction_model4[prediction_data_tsr$prediction_model4 < 0] = 0
prediction_data_tsr[, residual_model4 := actual - prediction_model4]
ggplot(prediction_data_tsr, aes(x = datetime)) + geom_line(aes(y = actual, color = "actual")) + geom_line(aes(y = prediction_model4, color = "model 4"))

sum_actual = sum(prediction_data_tsr$actual)
sum_error1 = sum(abs(prediction_data_tsr$residual_model1))
sum_error2 = sum(abs(prediction_data_tsr$residual_model2))
sum_error3 = sum(abs(prediction_data_tsr$residual_model3))
sum_error4 = sum(abs(prediction_data_tsr$residual_model4))
wmape_df <- data_frame(model = c("Model 1", "Model 2", "Model 3", "Model 4"),WMAPE = c(sum_error1/sum_actual, sum_error2/sum_actual, sum_error3/sum_actual, sum_error4/sum_actual), Model_Type = c("Time Series Regression","Time Series Regression","Time Series Regression","Time Series Regression"))
## Warning: `data_frame()` was deprecated in tibble 1.1.0.
## ℹ Please use `tibble()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
wmape_df
kpss.test(model_data_tr$production, null = "Trend")
## Warning in kpss.test(model_data_tr$production, null = "Trend"): p-value smaller
## than printed p-value
## 
##  KPSS Test for Trend Stationarity
## 
## data:  model_data_tr$production
## KPSS Trend = 2.4359, Truncation lag parameter = 12, p-value = 0.01
ggplot(model_data_tr, aes(x = datetime, y = diff14)) + geom_line() + labs(title = "Diff14 Production Data", x = "Date Time", y = "Production Diff14")
## Warning: Removed 14 rows containing missing values or values outside the scale range
## (`geom_line()`).

kpss.test(model_data_tr$diff14, null = "Trend")
## Warning in kpss.test(model_data_tr$diff14, null = "Trend"): p-value greater
## than printed p-value
## 
##  KPSS Test for Trend Stationarity
## 
## data:  model_data_tr$diff14
## KPSS Trend = 0.0022202, Truncation lag parameter = 12, p-value = 0.1
plot(acf(model_data_tr[complete.cases(model_data_tr)]$diff14), main = "ACF for Diff14 Production")

plot(pacf(model_data_tr[complete.cases(model_data_tr)]$diff14), main = "PACF for Diff14 Production")

arima_model <- auto.arima(production_available$diff14, seasonal = F, trace = T, stepwise = F, approximation = F)
## 
##  ARIMA(0,0,0) with zero mean     : 56645.24
##  ARIMA(0,0,0) with non-zero mean : 56647.24
##  ARIMA(0,0,1) with zero mean     : 51520.45
##  ARIMA(0,0,1) with non-zero mean : 51522.45
##  ARIMA(0,0,2) with zero mean     : 49765.64
##  ARIMA(0,0,2) with non-zero mean : 49767.64
##  ARIMA(0,0,3) with zero mean     : 49259.44
##  ARIMA(0,0,3) with non-zero mean : 49261.44
##  ARIMA(0,0,4) with zero mean     : 48988.43
##  ARIMA(0,0,4) with non-zero mean : 48990.43
##  ARIMA(0,0,5) with zero mean     : 48936.12
##  ARIMA(0,0,5) with non-zero mean : 48938.13
##  ARIMA(1,0,0) with zero mean     : 48861.46
##  ARIMA(1,0,0) with non-zero mean : 48863.47
##  ARIMA(1,0,1) with zero mean     : 48859.42
##  ARIMA(1,0,1) with non-zero mean : 48861.42
##  ARIMA(1,0,2) with zero mean     : 48853.09
##  ARIMA(1,0,2) with non-zero mean : 48855.09
##  ARIMA(1,0,3) with zero mean     : 48854.23
##  ARIMA(1,0,3) with non-zero mean : 48856.24
##  ARIMA(1,0,4) with zero mean     : 48851.29
##  ARIMA(1,0,4) with non-zero mean : 48853.29
##  ARIMA(2,0,0) with zero mean     : 48859.11
##  ARIMA(2,0,0) with non-zero mean : 48861.11
##  ARIMA(2,0,1) with zero mean     : 48851.28
##  ARIMA(2,0,1) with non-zero mean : 48853.28
##  ARIMA(2,0,2) with zero mean     : 48850.45
##  ARIMA(2,0,2) with non-zero mean : 48852.43
##  ARIMA(2,0,3) with zero mean     : 48852.42
##  ARIMA(2,0,3) with non-zero mean : 48854.38
##  ARIMA(3,0,0) with zero mean     : 48852.47
##  ARIMA(3,0,0) with non-zero mean : 48854.47
##  ARIMA(3,0,1) with zero mean     : 48850.52
##  ARIMA(3,0,1) with non-zero mean : 48852.46
##  ARIMA(3,0,2) with zero mean     : Inf
##  ARIMA(3,0,2) with non-zero mean : Inf
##  ARIMA(4,0,0) with zero mean     : 48853.58
##  ARIMA(4,0,0) with non-zero mean : 48855.58
##  ARIMA(4,0,1) with zero mean     : Inf
##  ARIMA(4,0,1) with non-zero mean : Inf
##  ARIMA(5,0,0) with zero mean     : 48850.23
##  ARIMA(5,0,0) with non-zero mean : 48852.23
## 
## 
## 
##  Best model: ARIMA(5,0,0) with zero mean
arima_model
## Series: production_available$diff14 
## ARIMA(5,0,0) with zero mean 
## 
## Coefficients:
##         ar1     ar2      ar3     ar4      ar5
##       0.674  0.0358  -0.0316  0.0226  -0.0209
## s.e.  0.009  0.0109   0.0109  0.0109   0.0090
## 
## sigma^2 = 3.17:  log likelihood = -24419.11
## AIC=48850.22   AICc=48850.23   BIC=48894.69
plot(acf(arima_model$residuals), main = "ACF for ARIMA(5,0,0) Residuals")

plot(pacf(arima_model$residuals), main = "PACF for ARIMA(5,0,0) Residuals")

checkresiduals(arima_model)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(5,0,0) with zero mean
## Q* = 13.651, df = 5, p-value = 0.01798
## 
## Model df: 5.   Total lags used: 10
fc_period = length(model_data_test$Lag14)
summary(arima_model)
## Series: production_available$diff14 
## ARIMA(5,0,0) with zero mean 
## 
## Coefficients:
##         ar1     ar2      ar3     ar4      ar5
##       0.674  0.0358  -0.0316  0.0226  -0.0209
## s.e.  0.009  0.0109   0.0109  0.0109   0.0090
## 
## sigma^2 = 3.17:  log likelihood = -24419.11
## AIC=48850.22   AICc=48850.23   BIC=48894.69
## 
## Training set error measures:
##                         ME     RMSE      MAE MPE MAPE      MASE         ACF1
## Training set -0.0002946464 1.780177 1.097426 NaN  Inf 0.9225724 0.0001199595
forecasted_diff_arima <- forecast(arima_model,fc_period)
prediction_data_arima <- copy(model_data_test)
prediction_data_arima[, actual := production]
prediction_data_arima[, prediction_diff_model5 := forecasted_diff_arima$mean]
prediction_data_arima[, prediction_model5 := prediction_diff_model5 + Lag14]
prediction_data_arima[, residual_model5 := actual - prediction_model5]
ggplot(prediction_data_arima, aes(x = datetime)) + geom_line(aes(y = actual, color = "actual")) + geom_line(aes(y = prediction_model5, color = "model 5"))

sum_actual = sum(prediction_data_arima$actual)
sum_error5 = sum(abs(prediction_data_arima$residual_model5))
wmape_df = rbind(wmape_df, c("Model 5", sum_error5/sum_actual, "ARIMA(5,0,0)"))
wmape_df
ts_data <- ts(model_data_tr$diff14, frequency = 14)
sarima_model <- auto.arima(ts_data, seasonal = T, trace = T, stepwise = F, approximation = F)
## 
##  ARIMA(0,0,0)            with zero mean     : 49037.71
##  ARIMA(0,0,0)            with non-zero mean : 49039.71
##  ARIMA(0,0,0)(0,0,1)[14] with zero mean     : 45355.3
##  ARIMA(0,0,0)(0,0,1)[14] with non-zero mean : 45357.29
##  ARIMA(0,0,0)(0,0,2)[14] with zero mean     : Inf
##  ARIMA(0,0,0)(0,0,2)[14] with non-zero mean : Inf
##  ARIMA(0,0,0)(1,0,0)[14] with zero mean     : 47091.93
##  ARIMA(0,0,0)(1,0,0)[14] with non-zero mean : 47093.91
##  ARIMA(0,0,0)(1,0,1)[14] with zero mean     : Inf
##  ARIMA(0,0,0)(1,0,1)[14] with non-zero mean : Inf
##  ARIMA(0,0,0)(1,0,2)[14] with zero mean     : Inf
##  ARIMA(0,0,0)(1,0,2)[14] with non-zero mean : Inf
##  ARIMA(0,0,0)(2,0,0)[14] with zero mean     : 46505.86
##  ARIMA(0,0,0)(2,0,0)[14] with non-zero mean : 46507.86
##  ARIMA(0,0,0)(2,0,1)[14] with zero mean     : Inf
##  ARIMA(0,0,0)(2,0,1)[14] with non-zero mean : Inf
##  ARIMA(0,0,0)(2,0,2)[14] with zero mean     : Inf
##  ARIMA(0,0,0)(2,0,2)[14] with non-zero mean : Inf
##  ARIMA(0,0,1)            with zero mean     : 44809.25
##  ARIMA(0,0,1)            with non-zero mean : 44811.25
##  ARIMA(0,0,1)(0,0,1)[14] with zero mean     : 40354.14
##  ARIMA(0,0,1)(0,0,1)[14] with non-zero mean : 40356.12
##  ARIMA(0,0,1)(0,0,2)[14] with zero mean     : Inf
##  ARIMA(0,0,1)(0,0,2)[14] with non-zero mean : Inf
##  ARIMA(0,0,1)(1,0,0)[14] with zero mean     : 42521.71
##  ARIMA(0,0,1)(1,0,0)[14] with non-zero mean : 42523.7
##  ARIMA(0,0,1)(1,0,1)[14] with zero mean     : Inf
##  ARIMA(0,0,1)(1,0,1)[14] with non-zero mean : Inf
##  ARIMA(0,0,1)(1,0,2)[14] with zero mean     : Inf
##  ARIMA(0,0,1)(1,0,2)[14] with non-zero mean : Inf
##  ARIMA(0,0,1)(2,0,0)[14] with zero mean     : 41827.75
##  ARIMA(0,0,1)(2,0,0)[14] with non-zero mean : 41829.75
##  ARIMA(0,0,1)(2,0,1)[14] with zero mean     : Inf
##  ARIMA(0,0,1)(2,0,1)[14] with non-zero mean : Inf
##  ARIMA(0,0,1)(2,0,2)[14] with zero mean     : Inf
##  ARIMA(0,0,1)(2,0,2)[14] with non-zero mean : Inf
##  ARIMA(0,0,2)            with zero mean     : 43343.38
##  ARIMA(0,0,2)            with non-zero mean : 43345.38
##  ARIMA(0,0,2)(0,0,1)[14] with zero mean     : Inf
##  ARIMA(0,0,2)(0,0,1)[14] with non-zero mean : Inf
##  ARIMA(0,0,2)(0,0,2)[14] with zero mean     : Inf
##  ARIMA(0,0,2)(0,0,2)[14] with non-zero mean : Inf
##  ARIMA(0,0,2)(1,0,0)[14] with zero mean     : 40865.9
##  ARIMA(0,0,2)(1,0,0)[14] with non-zero mean : 40867.89
##  ARIMA(0,0,2)(1,0,1)[14] with zero mean     : Inf
##  ARIMA(0,0,2)(1,0,1)[14] with non-zero mean : Inf
##  ARIMA(0,0,2)(1,0,2)[14] with zero mean     : Inf
##  ARIMA(0,0,2)(1,0,2)[14] with non-zero mean : Inf
##  ARIMA(0,0,2)(2,0,0)[14] with zero mean     : 40131.26
##  ARIMA(0,0,2)(2,0,0)[14] with non-zero mean : 40133.26
##  ARIMA(0,0,2)(2,0,1)[14] with zero mean     : Inf
##  ARIMA(0,0,2)(2,0,1)[14] with non-zero mean : Inf
##  ARIMA(0,0,3)            with zero mean     : 42930.45
##  ARIMA(0,0,3)            with non-zero mean : 42932.45
##  ARIMA(0,0,3)(0,0,1)[14] with zero mean     : Inf
##  ARIMA(0,0,3)(0,0,1)[14] with non-zero mean : Inf
##  ARIMA(0,0,3)(0,0,2)[14] with zero mean     : Inf
##  ARIMA(0,0,3)(0,0,2)[14] with non-zero mean : Inf
##  ARIMA(0,0,3)(1,0,0)[14] with zero mean     : 40361.65
##  ARIMA(0,0,3)(1,0,0)[14] with non-zero mean : 40363.64
##  ARIMA(0,0,3)(1,0,1)[14] with zero mean     : Inf
##  ARIMA(0,0,3)(1,0,1)[14] with non-zero mean : Inf
##  ARIMA(0,0,3)(2,0,0)[14] with zero mean     : 39577.93
##  ARIMA(0,0,3)(2,0,0)[14] with non-zero mean : 39579.93
##  ARIMA(0,0,4)            with zero mean     : 42707.16
##  ARIMA(0,0,4)            with non-zero mean : 42709.17
##  ARIMA(0,0,4)(0,0,1)[14] with zero mean     : Inf
##  ARIMA(0,0,4)(0,0,1)[14] with non-zero mean : Inf
##  ARIMA(0,0,4)(1,0,0)[14] with zero mean     : 40117.35
##  ARIMA(0,0,4)(1,0,0)[14] with non-zero mean : 40119.35
##  ARIMA(0,0,5)            with zero mean     : 42659.94
##  ARIMA(0,0,5)            with non-zero mean : 42661.94
##  ARIMA(1,0,0)            with zero mean     : 42620
##  ARIMA(1,0,0)            with non-zero mean : 42622
##  ARIMA(1,0,0)(0,0,1)[14] with zero mean     : Inf
##  ARIMA(1,0,0)(0,0,1)[14] with non-zero mean : Inf
##  ARIMA(1,0,0)(0,0,2)[14] with zero mean     : Inf
##  ARIMA(1,0,0)(0,0,2)[14] with non-zero mean : Inf
##  ARIMA(1,0,0)(1,0,0)[14] with zero mean     : 39967.77
##  ARIMA(1,0,0)(1,0,0)[14] with non-zero mean : 39969.77
##  ARIMA(1,0,0)(1,0,1)[14] with zero mean     : Inf
##  ARIMA(1,0,0)(1,0,1)[14] with non-zero mean : Inf
##  ARIMA(1,0,0)(1,0,2)[14] with zero mean     : Inf
##  ARIMA(1,0,0)(1,0,2)[14] with non-zero mean : Inf
##  ARIMA(1,0,0)(2,0,0)[14] with zero mean     : 39139.21
##  ARIMA(1,0,0)(2,0,0)[14] with non-zero mean : 39141.21
##  ARIMA(1,0,0)(2,0,1)[14] with zero mean     : Inf
##  ARIMA(1,0,0)(2,0,1)[14] with non-zero mean : Inf
##  ARIMA(1,0,0)(2,0,2)[14] with zero mean     : Inf
##  ARIMA(1,0,0)(2,0,2)[14] with non-zero mean : Inf
##  ARIMA(1,0,1)            with zero mean     : 42612.56
##  ARIMA(1,0,1)            with non-zero mean : 42614.56
##  ARIMA(1,0,1)(0,0,1)[14] with zero mean     : Inf
##  ARIMA(1,0,1)(0,0,1)[14] with non-zero mean : Inf
##  ARIMA(1,0,1)(0,0,2)[14] with zero mean     : Inf
##  ARIMA(1,0,1)(0,0,2)[14] with non-zero mean : Inf
##  ARIMA(1,0,1)(1,0,0)[14] with zero mean     : 39962.12
##  ARIMA(1,0,1)(1,0,0)[14] with non-zero mean : 39964.12
##  ARIMA(1,0,1)(1,0,1)[14] with zero mean     : Inf
##  ARIMA(1,0,1)(1,0,1)[14] with non-zero mean : Inf
##  ARIMA(1,0,1)(1,0,2)[14] with zero mean     : Inf
##  ARIMA(1,0,1)(1,0,2)[14] with non-zero mean : Inf
##  ARIMA(1,0,1)(2,0,0)[14] with zero mean     : 39133.78
##  ARIMA(1,0,1)(2,0,0)[14] with non-zero mean : 39135.78
##  ARIMA(1,0,1)(2,0,1)[14] with zero mean     : Inf
##  ARIMA(1,0,1)(2,0,1)[14] with non-zero mean : Inf
##  ARIMA(1,0,2)            with zero mean     : 42603.68
##  ARIMA(1,0,2)            with non-zero mean : 42605.68
##  ARIMA(1,0,2)(0,0,1)[14] with zero mean     : Inf
##  ARIMA(1,0,2)(0,0,1)[14] with non-zero mean : Inf
##  ARIMA(1,0,2)(0,0,2)[14] with zero mean     : Inf
##  ARIMA(1,0,2)(0,0,2)[14] with non-zero mean : Inf
##  ARIMA(1,0,2)(1,0,0)[14] with zero mean     : 39950.64
##  ARIMA(1,0,2)(1,0,0)[14] with non-zero mean : 39952.64
##  ARIMA(1,0,2)(1,0,1)[14] with zero mean     : Inf
##  ARIMA(1,0,2)(1,0,1)[14] with non-zero mean : Inf
##  ARIMA(1,0,2)(2,0,0)[14] with zero mean     : 39123.58
##  ARIMA(1,0,2)(2,0,0)[14] with non-zero mean : 39125.58
##  ARIMA(1,0,3)            with zero mean     : 42605.35
##  ARIMA(1,0,3)            with non-zero mean : 42607.35
##  ARIMA(1,0,3)(0,0,1)[14] with zero mean     : Inf
##  ARIMA(1,0,3)(0,0,1)[14] with non-zero mean : Inf
##  ARIMA(1,0,3)(1,0,0)[14] with zero mean     : 39952.21
##  ARIMA(1,0,3)(1,0,0)[14] with non-zero mean : 39954.21
##  ARIMA(1,0,4)            with zero mean     : 42600.98
##  ARIMA(1,0,4)            with non-zero mean : 42602.98
##  ARIMA(2,0,0)            with zero mean     : 42611.65
##  ARIMA(2,0,0)            with non-zero mean : 42613.65
##  ARIMA(2,0,0)(0,0,1)[14] with zero mean     : Inf
##  ARIMA(2,0,0)(0,0,1)[14] with non-zero mean : Inf
##  ARIMA(2,0,0)(0,0,2)[14] with zero mean     : Inf
##  ARIMA(2,0,0)(0,0,2)[14] with non-zero mean : Inf
##  ARIMA(2,0,0)(1,0,0)[14] with zero mean     : 39961.34
##  ARIMA(2,0,0)(1,0,0)[14] with non-zero mean : 39963.34
##  ARIMA(2,0,0)(1,0,1)[14] with zero mean     : Inf
##  ARIMA(2,0,0)(1,0,1)[14] with non-zero mean : Inf
##  ARIMA(2,0,0)(1,0,2)[14] with zero mean     : Inf
##  ARIMA(2,0,0)(1,0,2)[14] with non-zero mean : Inf
##  ARIMA(2,0,0)(2,0,0)[14] with zero mean     : Inf
##  ARIMA(2,0,0)(2,0,0)[14] with non-zero mean : Inf
##  ARIMA(2,0,0)(2,0,1)[14] with zero mean     : Inf
##  ARIMA(2,0,0)(2,0,1)[14] with non-zero mean : Inf
##  ARIMA(2,0,1)            with zero mean     : 42601.93
##  ARIMA(2,0,1)            with non-zero mean : 42603.95
##  ARIMA(2,0,1)(0,0,1)[14] with zero mean     : Inf
##  ARIMA(2,0,1)(0,0,1)[14] with non-zero mean : Inf
##  ARIMA(2,0,1)(0,0,2)[14] with zero mean     : Inf
##  ARIMA(2,0,1)(0,0,2)[14] with non-zero mean : Inf
##  ARIMA(2,0,1)(1,0,0)[14] with zero mean     : 39951.86
##  ARIMA(2,0,1)(1,0,0)[14] with non-zero mean : 39953.86
##  ARIMA(2,0,1)(1,0,1)[14] with zero mean     : Inf
##  ARIMA(2,0,1)(1,0,1)[14] with non-zero mean : Inf
##  ARIMA(2,0,1)(2,0,0)[14] with zero mean     : 39124.9
##  ARIMA(2,0,1)(2,0,0)[14] with non-zero mean : 39126.91
##  ARIMA(2,0,2)            with zero mean     : 42602.32
##  ARIMA(2,0,2)            with non-zero mean : 42604.32
##  ARIMA(2,0,2)(0,0,1)[14] with zero mean     : Inf
##  ARIMA(2,0,2)(0,0,1)[14] with non-zero mean : Inf
##  ARIMA(2,0,2)(1,0,0)[14] with zero mean     : 39950.9
##  ARIMA(2,0,2)(1,0,0)[14] with non-zero mean : 39952.9
##  ARIMA(2,0,3)            with zero mean     : 42604.28
##  ARIMA(2,0,3)            with non-zero mean : 42606.28
##  ARIMA(3,0,0)            with zero mean     : 42602.74
##  ARIMA(3,0,0)            with non-zero mean : 42604.75
##  ARIMA(3,0,0)(0,0,1)[14] with zero mean     : Inf
##  ARIMA(3,0,0)(0,0,1)[14] with non-zero mean : Inf
##  ARIMA(3,0,0)(0,0,2)[14] with zero mean     : Inf
##  ARIMA(3,0,0)(0,0,2)[14] with non-zero mean : Inf
##  ARIMA(3,0,0)(1,0,0)[14] with zero mean     : 39949.77
##  ARIMA(3,0,0)(1,0,0)[14] with non-zero mean : 39951.77
##  ARIMA(3,0,0)(1,0,1)[14] with zero mean     : Inf
##  ARIMA(3,0,0)(1,0,1)[14] with non-zero mean : Inf
##  ARIMA(3,0,0)(2,0,0)[14] with zero mean     : 39122.98
##  ARIMA(3,0,0)(2,0,0)[14] with non-zero mean : 39124.98
##  ARIMA(3,0,1)            with zero mean     : 42602.3
##  ARIMA(3,0,1)            with non-zero mean : 42604.3
##  ARIMA(3,0,1)(0,0,1)[14] with zero mean     : Inf
##  ARIMA(3,0,1)(0,0,1)[14] with non-zero mean : Inf
##  ARIMA(3,0,1)(1,0,0)[14] with zero mean     : 39950.86
##  ARIMA(3,0,1)(1,0,0)[14] with non-zero mean : 39952.91
##  ARIMA(3,0,2)            with zero mean     : Inf
##  ARIMA(3,0,2)            with non-zero mean : Inf
##  ARIMA(4,0,0)            with zero mean     : 42604.49
##  ARIMA(4,0,0)            with non-zero mean : 42606.49
##  ARIMA(4,0,0)(0,0,1)[14] with zero mean     : Inf
##  ARIMA(4,0,0)(0,0,1)[14] with non-zero mean : Inf
##  ARIMA(4,0,0)(1,0,0)[14] with zero mean     : 39951.51
##  ARIMA(4,0,0)(1,0,0)[14] with non-zero mean : 39953.51
##  ARIMA(4,0,1)            with zero mean     : Inf
##  ARIMA(4,0,1)            with non-zero mean : Inf
##  ARIMA(5,0,0)            with zero mean     : 42600.07
##  ARIMA(5,0,0)            with non-zero mean : 42602.07
## 
## 
## 
##  Best model: ARIMA(3,0,0)(2,0,0)[14] with zero mean
sarima_model
## Series: ts_data 
## ARIMA(3,0,0)(2,0,0)[14] with zero mean 
## 
## Coefficients:
##          ar1     ar2      ar3     sar1     sar2
##       0.6916  0.0509  -0.0337  -0.6013  -0.2745
## s.e.  0.0097  0.0118   0.0097   0.0094   0.0093
## 
## sigma^2 = 2.311:  log likelihood = -19555.48
## AIC=39122.97   AICc=39122.98   BIC=39166.6
plot(acf(sarima_model$residuals), main = "ACF for ARIMA(3,0,0)(2,0,0)[14] Residuals")

plot(pacf(sarima_model$residuals), main = "PACF for ARIMA(3,0,0)(2,0,0)[14] Residuals")

checkresiduals(sarima_model)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(3,0,0)(2,0,0)[14] with zero mean
## Q* = 321.93, df = 23, p-value < 2.2e-16
## 
## Model df: 5.   Total lags used: 28
summary(sarima_model)
## Series: ts_data 
## ARIMA(3,0,0)(2,0,0)[14] with zero mean 
## 
## Coefficients:
##          ar1     ar2      ar3     sar1     sar2
##       0.6916  0.0509  -0.0337  -0.6013  -0.2745
## s.e.  0.0097  0.0118   0.0097   0.0094   0.0093
## 
## sigma^2 = 2.311:  log likelihood = -19555.48
## AIC=39122.97   AICc=39122.98   BIC=39166.6
## 
## Training set error measures:
##                       ME     RMSE       MAE MPE MAPE      MASE         ACF1
## Training set 0.000550437 1.519956 0.9709748 NaN  Inf 0.4051865 9.730008e-05
forecasted_diff_sarima <- forecast(sarima_model,fc_period)
prediction_data_arima[, prediction_diff_model6 := forecasted_diff_sarima$mean]
prediction_data_arima[, prediction_model6 := prediction_diff_model6 + Lag14]
prediction_data_arima[, residual_model6 := actual - prediction_model6]
ggplot(prediction_data_arima, aes(x = datetime)) + geom_line(aes(y = actual, color = "actual")) + geom_line(aes(y = prediction_model6, color = "model 6"))

sum_error6 = sum(abs(prediction_data_arima$residual_model6))
wmape_df = rbind(wmape_df, c("Model 6", sum_error6/sum_actual, "ARIMA(3,0,0)(2,0,0)[14]"))
wmape_df
Residuals <- data_frame(model_1 = prediction_data_tsr$residual_model1,model_2 = prediction_data_tsr$residual_model2, model_3 = prediction_data_tsr$residual_model3, model_4 = prediction_data_tsr$residual_model4, model_5 = prediction_data_arima$residual_model5, model_6 = prediction_data_arima$residual_model6)
library(tidyr)
Residual_model <- Residuals %>%
  pivot_longer(cols = starts_with("model"), names_to = "model", values_to = "residual")
ggplot(Residual_model, aes(x = residual, y = model, fill = model)) +geom_density_ridges() + labs(title  = "Residual Distributions of Each Model")
## Picking joint bandwidth of 0.214